
% This takes raw files created in my stata do-file.
clear 

% N is number of price states, X is number of exporters, C is number of
% cities, M is the number of importers.

delete('Ryan');
diary('Ryan');

cd ../china/data
load hs10_indlist.raw;
indN=length(hs10_indlist);

for z=1:50

folder=sprintf('../china/estimation/hs10/biggest/raw_%d',hs10_indlist(z,1))

cd (folder)
load cities.raw;
% X x 1 vector- the city of each exporter
load citydiff.raw;
% C x 1 vector- the average price increase (from time 0 to 1) in each city.
load statepts.raw;
% N+1 x 1 vector- with N price states, there are N+1 price interval points.
load price_minyear.raw;
% X x 1 vector- the average (time 0) price at each exporter.
load w.raw;
% M x 2 vector of realized w shocks
    % 1st Column: which exporter the shocks are for.
    % 2nd Column: the realization of the shocks.
% I still compute the variance of their shocks though.
load imp.raw;
% M x 3 matrix of observed choices


%imp=[imp; imp; imp];
%imp1=[(1:5)'; 4];
%imp(:,1)=repmat(imp1,10,1);


N=length(statepts)-1;
X=length(price_minyear);
C=length(citydiff);
M=length(imp);
S=N*X*X; % Total number of states

%% Create the State Space
% There are N price states, X exporters, and thus X choices of future exporter.
% Thus we have N*X*X total states.
% The order is for each past x, for each future x, there is a price.

p=(1:N)';
p=repmat(p,X*X,1);

x=1:X;
x=repmat(x,N,1);
x=x(:);
x=repmat(x,X,1);

xm1=1:X;
xm1=repmat(xm1,N*X,1);
xm1=xm1(:);

% Connect on the cities using our city information.
c=zeros(S,1);
cm1=zeros(S,1);
for i=1:S
    for j=1:X
        if x(i,1)==j
            c(i,1)=cities(j,1);
        end
        if xm1(i,1)==j
            cm1(i,1)=cities(j,1);
        end
    end
end

% Connect on the quality
lambda=zeros(S,1);
for i=1:S
    for j=1:X
        if x(i,1)==j
            lambda(i,1)=cities(j,2);
        end
    end
end





% Switching indicator
switched=zeros(S,1);
for i=1:S
    if xm1(i,1)~=x(i,1)
        switched(i,1)=1;
    end
end


% For the actual data
switched_d=zeros(M,1);
for i=1:M
    if imp(i,2)~=imp(i,3)
	switched_d(i,1)=1;
    end
end

c_d=zeros(M,1);
cm1_d=zeros(M,1);
for i=1:M
    for j=1:X
        if imp(i,3)==j
            c_d(i,1)=cities(j,1);
        end
        if imp(i,2)==j
            cm1_d(i,1)=cities(j,1);
        end
    end
end

city_switched_d=zeros(M,1);
for i=1:M
    if cm1_d(i,1)~=c_d(i,1)
        city_switched_d(i,1)=1;
    end
end

save (['facts'],'imp','switched_d','city_switched_d','-ascii')



% City switching indicator
city_switched=zeros(S,1);
for i=1:S
    if cm1(i,1)~=c(i,1)
        city_switched(i,1)=1;
    end
end

[p x xm1 c cm1];

%% Define the expected price vector

% Create the representative points (midpoint) for each price state
stateval=zeros(N,1);
for i=1:N
    stateval(i,1)=(statepts(i,1)+statepts(i+1,1) ) / 2;
end

price_stateval=repmat(stateval,X*X,1);


% Connect the average city price increase to the (future) city choice
av_pricediff=zeros(S,1);
for i=1:S
    for j=1:C
        if c(i,1)==j
            av_pricediff(i,1)=citydiff(j,1);
        end
    end
end
% With a choice of city c, prices are expected to increase by this amount.

% Define the expected price vector
ep=zeros(S,1);
for i=1:S
    % If you stayed with your partner, your expected price is the midpoint
    % of the state you were in plus the increae in city prices 
    
    % Though seemingly unsatisfying, we have to avoid any case where two
    % importers have different prices but are in the same state for the
    % value function FP equation to make sense.
    if switched(i,1) == 0
        ep(i,1) = price_stateval(i,1) + av_pricediff(i,1);
    end
    % If you switched, then your expected price is the average price at the
    % guy you switched to, plus the price increase in that city.
    
    % Importantly, this doesn't vary based on previous conditions (x and
    % p) as the above does, and thus we don't have to use the midpoint of
    % the state interval.
    if switched(i,1) == 1
        for j=1:X
            if x(i,1)==j
                ep(i,1) = price_minyear(j,1) + av_pricediff(i,1); %+ i/2;
            end
        end
    end
end

[p x xm1 ep];

%% Define the price probabilty vector

% First compute the variance of the w shocks for each exporter.

% This is a real pain, but I managed to compute the variance of each w
% depending on each unique exporter.

% w(:,1) is the exporter, and w(:,2) is the realized w value.
% For this example w(:,1)=[1 2 2 2 3 3]', w(:,2)=[1.5 -3.5 1.5 1.5 -1.5 2]'

[C,ia,ic] = unique(w(:,1),'first');
% ia is a vector that tells me the first appearance of each unique exporter.
% For example, for w(:,1)=[1 2 2 2 3 3]', ia=[1 2 5]'.
ia;
g=length(ia);
% Next I make separate vectors for each, using the realized value of each.
% I use my computed first occurrence values from ia
% Following the example above, A1=[1.5], A2=[-3.5 1.5 1.5]', A3=[-1.5 2]'
for i=1:g-1
    eval(sprintf('A%d = w(ia(i) : ia(i+1) - 1,2)',i));
end
eval(sprintf('A%d=w(ia(g):end,2)',g));

% Finally calculate the variance of each of these vectors, and save in an X
% x 1 vector.
var_w=zeros(X,1);
for i=1:X
    var_w(i,1)=eval(sprintf('var(A%d)',i));
end

%var_v(1,1)=0;
%var_v(2,1)=0;

TransProbs=zeros(S*N,1);

TransProbsP=p';
TransProbsP=repmat(TransProbsP,N,1);
TransProbsP=TransProbsP(:);

TransProbsPNew=[1:N]';
TransProbsPNew=repmat(TransProbsPNew,X*X*N,1);

TransProbsEP=ep';
TransProbsEP=repmat(TransProbsEP,N,1);
TransProbsEP=TransProbsEP(:);

TransProbsPUB=statepts(2:N+1,1);
TransProbsPUB=repmat(TransProbsPUB,X*X*N,1);

TransProbsPLB=statepts(1:N,1);
TransProbsPLB=repmat(TransProbsPLB,X*X*N,1);

TransProbsx=x';
TransProbsx=repmat(TransProbsx,N,1);
TransProbsx=TransProbsx(:);

TransProbsVarx=zeros(S*N,1);
for i=1:S*N
    for j=1:X
        if TransProbsx(i,1)==j
            TransProbsVarx(i,1)=var_w(j,1);
        end
    end
end

for i=1:S*N
    TransProbs(i,1)=normcdf(TransProbsPUB(i,1)-TransProbsEP(i,1),0,sqrt(TransProbsVarx(i,1)))...
        - normcdf(TransProbsPLB(i,1)-TransProbsEP(i,1),0,sqrt(TransProbsVarx(i,1)));
    if TransProbsPUB(i,1)==statepts(N+1,1)
        TransProbs(i,1)=1-normcdf(TransProbsPLB(i,1)-TransProbsEP(i,1),0,sqrt(TransProbsVarx(i,1)));
    end
    if TransProbsPLB(i,1)==statepts(1,1)
        TransProbs(i,1)=normcdf(TransProbsPUB(i,1)-TransProbsEP(i,1),0,sqrt(TransProbsVarx(i,1)));
    end
end


[TransProbs TransProbsP TransProbsPNew TransProbsx TransProbsVarx TransProbsEP TransProbsPUB TransProbsPLB];

%% Put the actual data in this format
impprice=imp(:,1);
impxm1=imp(:,2);
impxm0=imp(:,3);

statenum=[1:S]';

impchoicestate=zeros(M,1);

for h=1:M
    for i=1:N
        if impprice(h,1)==i
            for j=1:X
                if impxm0(h,1)==j
                    for k=1:X
                        if impxm1(h,1)==k
                            impchoicestate(h,1)=statenum((k-1)*X*N+(j-1)*N+i,1);
                        end
                    end
                end
            end
        end
    end
end
                            

save ('data2_q_biggest', 'TransProbs', 'ep', 'cities','impchoicestate','X','N','M','switched','city_switched','lambda');   


z

cd ../china/estimation/hs10
end

diary off;